Description about the Project

Setup:

Data:

Parliamentarians

Tweets

Data processing

Plots:

Social Media usage of german political paries in 2021

tweetTimelineData = mainDataWithTweets %>% 
  filter(created_at.x > as.POSIXct("2021-01-01")) %>% 
    mutate(Partei, Partei = case_when(
    Partei == "CSU"~"CDU/CSU",
    Partei == "CDU"~"CDU/CSU",
    Partei == "FDP/DVP"~"FDP",
    Partei == "FW"~"Andere",
    Partei == "fraktionslos"~"Andere",
    Partei == "SSW"~"Andere",
    Partei == "BIW"~"Andere",
    Partei == "BVB/FW"~"Andere",
    TRUE ~ Partei
  ))
partei_farben = c(
    "CDU/CSU" = "black",
    "SPD" = "#E30013",
    "FDP" = "#FFDD00",
    "LINKE" = "#BD3075",
    "GRÜNE" = "#19A229",
    "AfD" = "#009FE1",
    "Andere" = "grey"
)

timeline <- ggplot(tweetTimelineData)+
  geom_freqpoly(aes(created_at.x, color = Partei), position = "stack")+ 
  scale_x_datetime(date_breaks = "1 month")+ # Anpassen um die Zeitangabe richtig anzuzeigen
  scale_color_manual(values = partei_farben)+
  labs(
    title = "Tweetsvorkommen der Politiker",
    subtitle = "Zeitverlauf 2021",
    x = "Zeit",
    y = "Anzahl Tweets",
    color = "Retweet"
  )

ggplotly(timeline)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Word Cloud of commonly used hashtags of german german political paries in 2021

ggplot(topHashtags, 
       aes(label = hashtags, 
           size = no_hashtags
           )
       ) +
  geom_text_wordcloud_area(area_corr = TRUE, shape = "square", rm_outside = T, area_corr_power = 1) +
  # To revert, change rm_outside to FALSE and max_size to 15
  scale_size_area(max_size = 19) +
  theme_minimal() +
  scale_color_gradient(low = "black", high = "lightblue") +
  labs(title = "Die meist verwendeten Hashtags der Parteien") +
  facet_wrap(~Partei)
## Some words could not fit on page. They have been removed.
## Some words could not fit on page. They have been removed.
## Some words could not fit on page. They have been removed.
## Some words could not fit on page. They have been removed.
## Some words could not fit on page. They have been removed.
## Some words could not fit on page. They have been removed.

Twitter Usage per state in 2021 (Map test)

# References:
# https://cran.r-project.org/web/packages/leaflet.minicharts/vignettes/introduction.html
# https://r-spatial.github.io/mapview/articles/articles/mapview_04-popups.html

mapData = mainDataWithState %>% 
  group_by(Bundesland) %>%
  mutate(Partei, Partei = case_when(
    Partei == "CSU"~"CDU/CSU",
    Partei == "CDU"~"CDU/CSU",
    Partei == "FDP/DVP"~"FDP",
    Partei == "FW"~"Andere",
    Partei == "fraktionslos"~"Andere",
    TRUE ~ Partei
  )) %>% 
  dplyr::mutate(
    no_tweets = n()
  ) %>%
  dplyr::select(Bundesland, no_tweets) %>% 
  distinct(Bundesland, .keep_all=TRUE) %>% 
  filter(Bundesland != "Deutschland") %>% 
  filter(Bundesland != "EU-Parlament")

top3Hashtags = topHashtags %>% 
  group_by(Bundesland, Partei) %>% 
  slice_max(order_by = no_hashtags, n = 3)

nestedHashtags = top3Hashtags %>% 
  dplyr::select(Partei, Bundesland, hashtags) %>% 
  group_by(Partei, Bundesland) %>%
  nest() %>% 
  group_by(Bundesland) %>% 
  nest()

hashtagHTML = top3Hashtags %>% 
  ungroup() %>% 
  dplyr::select(-screen_name) %>% 
  group_by(Bundesland, Partei) %>%
  mutate(hashtags3 = paste0("#", hashtags, collapse = ", ")) %>% 
  mutate(partei = paste0("<strong>",Partei,": </strong>")) %>% 
  distinct(Partei, Bundesland, .keep_all=TRUE) %>% 
  mutate(html = paste0(partei, hashtags3, "<br>")) %>% 
  group_by(Bundesland) %>% 
  mutate(html = paste0(html, collapse = "")) %>% 
  distinct(Bundesland, html)
  #mutate(hashtags3 = paste0(html,hashtags,"<br>", collapse = ", "))

# Load map data from GADM
de_map <- 
  getData("GADM", country="Germany", level=1, path='./data/mapfiles') %>% 
  st_as_sf() %>% 
  left_join(mapData, by = c("NAME_1" = "Bundesland")) %>% 
  left_join(hashtagHTML, by = c("NAME_1" = "Bundesland"))
  # dplyr::select() - waiting for dataframe


cuts <- c(0, 20000, 40000, 60000, 80000, 100000, 120000, 140000, 160000, 180000)
pal <- colorBin("PuBu", domain = de_map$no_tweets, bins = cuts) 

# Hard coded Version                
popup <- paste("<strong>Anzahl der Tweets in </strong>", 
                "<strong>", de_map$NAME_1, ": </strong>", "<br>", de_map$no_tweets,
                "<br>", "<br>",
                "<b> Top 3 Hashtags pro Partei: </b>", "<br>",
                de_map$html)

mapTest <- 
  leaflet(data = de_map) %>% 
    addProviderTiles(providers$CartoDB.Voyager) %>% 
  setView(lng = 10.4507147, lat = 50.9833118, zoom = 5) %>% 
  addPolygons(data = de_map, 
              fillColor= ~pal(no_tweets),
              fillOpacity = 0.8, 
              weight = 0.5,
              label = de_map$NAME_1,
              color = "black",
              popup = popup) %>% 
  addLegend("bottomright", 
            pal = pal, 
            values = de_map$no_tweets,  
            title = "Anzahl der Tweets", 
            opacity = 1)
mapTest

Pie Chart

missingTwitterUsers = tibble(User = scrapedTweetsPerUser) %>% 
  mutate(tweets = map_int(User, nrow)) %>% 
  mutate(usernames = names(scrapedTweetsPerUser)) %>% 
  dplyr::select(-User) %>% 
  mutate(missing = ifelse(tweets == 0, "Fehlende Twitter Daten", "Twitter Daten vorhanden"))

# ggplot version
ggplot(missingTwitterUsers, aes(x="", y=missing, fill=missing)) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0)

# plotly version
pieColors <- c("Twitter Daten Vorhanden" = "000000",
               "Fehlende Twitter Daten" = "ece7f2")

plot_ly(
  missingTwitterUsers, 
  labels = ~missing, 
  type = "pie",
  textinfo = "percentage",
  textposition = "inside",
  marker = list(colors = ~pieColors)
  ) %>% 
  layout(title = "Anteil von vorhandenen Twitter Daten")
# to do: count is one, change color, add text and fix legend
stackedBarData = mainData %>% 
  left_join(missingTwitterUsers, by = c("SM_Twitter_user" = "usernames" )) %>% 
  filter(Kategorie != "EU-Parlament") %>% 
  mutate(missing = replace_na(missing, "Kein Account")) %>% 
  dplyr::select(Name, Partei, missing) %>%
  mutate(Partei, Partei = case_when(
    Partei == "CSU"~"CDU/CSU",
    Partei == "CDU"~"CDU/CSU",
    Partei == "FDP/DVP"~"FDP",
    Partei == "FW"~"Andere",
    Partei == "fraktionslos"~"Andere",
    Partei == "SSW"~"Andere",
    Partei == "BIW"~"Andere",
    Partei == "BVB/FW"~"Andere",
    TRUE ~ Partei
  )) %>% 
  group_by(Partei) %>%
  dplyr::mutate(
    count = 1
  )

options(scipen = 999)

stackedBar <- 
  ggplot(stackedBarData, aes(fill=missing, y=count, x=Partei)) +
  geom_bar(position="stack", 
           stat="identity") +
  scale_fill_manual("", values = c("#ece7f2", "#a6bddb", "#2b8cbe")) +
  ggtitle("Verteilung von Twitter Konten pro Partei") + 
  ylab("Anzahl der Konten") +
  xlab("") + 
  theme(panel.background = element_blank())

ggplotly(stackedBar)
plot_ly(stackedBarData, 
        x = ~Partei, 
        y = ~count,
        hovertemplate = "my value: %{y}",
        type = "bar", 
        name = ~missing, 
        color = ~missing
        ) %>% 
  layout(yaxis = list(title = "count"), 
         barmode = "stack",
         title = "Verteilung von Twitter Konten pro Partei")